# -*- coding: utf-8 -*-
import urllib2
import time
from bs4 import BeautifulSoup
url = "http://bj.lianjia.com/"

#CREATE TABLE IF NOT EXISTS `tb_house_num` ( `id` int(11) NOT NULL , `number` varchar(255) DEFAULT NULL, `date` DATE NULL,PRIMARY KEY (`id`)) ENGINE=InnoDB  DEFAULT CHARSET=utf8;


def parseHouseNum(url):

    content = urllib2.urlopen(url).read()
    soup = BeautifulSoup(content, "html.parser")
    houseNumDiv = soup.find_all("div", class_="house-num",limit=1)
    lis = houseNumDiv[0].find_all("li")
    for li in lis:
        if u'北京链家真实在售二手房' in li.text:
            print li.text.replace(u'北京链家真实在售二手房 ',"").replace(u' 套',"")
            #print insert into `tb_house_num`(`number`,`date`) values ("'"+li.text.replace(u'北京链家真实在售二手房 ',"").replace(u' 套',"")+"'","'"+time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())+"'")

def parseHouseNumForSubArea():
    content = urllib2.urlopen(url).read()
    soup = BeautifulSoup(content, "html.parser")
    houseNumDiv = soup.find_all("div", class_="house-num",limit=1)
    lis = houseNumDiv[0].find_all("li")
    for li in lis:
        if u'北京链家真实在售二手房' in li.text:
            print li.text.replace(u'北京链家真实在售二手房 ',"").replace(u' 套',"")
    


def main():
    parseHouseNum(url)

if __name__ == "__main__":
    main()
